# Author: Mark Richardson
# Date started: 11/12/2022
# Date finished: 11/12/2022
# Purpose: Format pid performance ratings
# Proofed 1/05/2023

# Load packages
library(dplyr)
library(ggplot2)
library(rstan)
library(stringr)

# Load data
load("data/ratings/performance_ratings/03_performance_fitted_models_pid.RData")

# Function to scale ratings to be N(0, 1)

localID <- function(stan_object) {
  
  x <- as.matrix(stan_object, pars = "x")
  
  x <- (x - mean(x)) / sd(x)
  
  x_out <- tibble(mn = apply(x, MARGIN = 2, FUN = mean),
                  sd = apply(x, MARGIN = 2, FUN = sd),
                  lb = apply(x, MARGIN = 2, FUN = quantile, probs = 0.025),
                  ub = apply(x, MARGIN = 2, FUN = quantile, probs = 0.975),
                  agency_index = 1:ncol(x))
  
  return(x_out)
    
}

#### Hierarchical performance ratings with informed priors baseline ####

x_hier_base <- localID(perf_inf_hier_base) %>%
  full_join(., agency_data, by = "agency_index") %>%
  left_join(., perf_stan %>% 
              select(agency_index, agency_rated, n_ratings_agency) %>%
              distinct(), by = c("agency_index", "agency_rated")) %>%
  select(agency_rated, mn:ub, n_ratings_agency) %>%
  rename(agency = agency_rated,
         perf_rating_inf_hier_cmp = mn,
         perf_rating_inf_hier_cmp_sd = sd,
         perf_rating_inf_hier_cmp_lb = lb,
         perf_rating_inf_hier_cmp_ub = ub,
         n_pid_perf_ratings = n_ratings_agency)

#### Performance ratings with informed priors using and pid and appointee vars ####

x_hier_pid <- localID(perf_inf_hier_pid) %>%
  full_join(., agency_data, by = "agency_index") %>%
  select(agency_rated, mn:ub) %>%
  rename(agency = agency_rated,
         perf_rating_inf_hier_pid = mn,
         perf_rating_inf_hier_pid_sd = sd,
         perf_rating_inf_hier_pid_lb = lb,
         perf_rating_inf_hier_pid_ub = ub)

#### Hierarchical performance ratings with naive priors baseline ####

x_nv_base <- localID(perf_naive_hier_base) %>%
  full_join(., agency_data, by = "agency_index") %>%
  select(agency_rated, mn:ub) %>%
  rename(agency = agency_rated,
         perf_rating_nv_hier_cmp = mn,
         perf_rating_nv_hier_cmp_sd = sd,
         perf_rating_nv_hier_cmp_lb = lb,
         perf_rating_nv_hier_cmp_ub = ub)

#### Performance ratings with naive priors ####

x_nv_pid <- localID(perf_naive_hier_pid) %>%
  full_join(., agency_data, by = "agency_index") %>%
  select(agency_rated, mn:ub) %>%
  rename(agency = agency_rated,
         perf_rating_nv_hier_pid = mn,
         perf_rating_nv_hier_pid_sd = sd,
         perf_rating_nv_hier_pid_lb = lb,
         perf_rating_nv_hier_pid_ub = ub)

x <- full_join(x_hier_base, x_hier_pid)

x <- full_join(x, x_nv_base)

x <- full_join(x, x_nv_pid)

#### Save formatted data ####

perf_ratings_pid_out <- x %>% 
  select(agency,
         starts_with("perf_") & contains("_pid"),
         starts_with("perf_"),
         n_pid_perf_ratings)

# Require 5 ratings per agency

perf_ratings_pid_out <- perf_ratings_pid_out %>% filter(n_pid_perf_ratings >= 5)

rm(x, x_hier_base, x_hier_pid, x_nv_base, x_nv_pid)

#### Format separate pid ratings ####

#### Format ratings with dept hierarchy ####

# Get draws of latent traits
x <- as.matrix(perf_pid_sep, pars = c("x_dem", "x_rep", "x_fixed"))

x_std <- (x - mean(x)) / sd(x)

mean(x_std)

sd(x_std)

x_std <- as_tibble(x_std)

for (i in 1:for_stan_pid_sep$A_pid) {
  
  pid_vars <- paste0(c("x_dem[", "x_rep["), i, "]")
  
  new_var <- paste0("x_dif[", i, "]")
  
  x_std <- x_std %>%
    mutate({{ new_var }} := .data[[pid_vars[1]]] - .data[[pid_vars[2]]])
  
}

x_pid <- tibble(perf_rating = apply(x_std, MARGIN = 2, FUN = mean),
                perf_rating_sd = apply(x_std, MARGIN = 2, FUN = sd),
                perf_rating_lb = apply(x_std, MARGIN = 2, FUN = quantile, probs = 0.025),
                perf_rating_ub = apply(x_std, MARGIN = 2, FUN = quantile, probs = 0.975),
                perf_rating_lb_90 = apply(x_std, MARGIN = 2, FUN = quantile, probs = 0.05),
                perf_rating_ub_90 = apply(x_std, MARGIN = 2, FUN = quantile, probs = 0.95),
                par = names(x_std))

# Create fixed indicator and agency index

x_pid <- x_pid %>%
  mutate(fixed = as.numeric(stringr::str_detect(par, "fixed")),
         agency_index = stringr::str_sub(par,
                                         start = stringr::str_locate(par, "\\[")[,1] + 1,
                                         end =   stringr::str_locate(par, "\\]")[,1] - 1),
         agency_index = as.numeric(agency_index)) %>%
  mutate(type = stringr::str_sub(par,
                                 start = stringr::str_locate(par, "\\_")[,1] + 1,
                                 end =   stringr::str_locate(par, "\\[")[,1] - 1))

x_pid_dem <- x_pid %>%
  filter(type == "dem") %>%
  select(!c("par", "type", "perf_rating_lb_90", "perf_rating_ub_90")) %>%
  rename(perf_rating_dem = perf_rating,
         perf_rating_dem_sd = perf_rating_sd,
         perf_rating_dem_lb = perf_rating_lb,
         perf_rating_dem_ub = perf_rating_ub)

x_pid_rep <- x_pid %>%
  filter(type == "rep") %>%
  select(!c("par", "type", "perf_rating_lb_90", "perf_rating_ub_90")) %>%
  rename(perf_rating_rep = perf_rating,
         perf_rating_rep_sd = perf_rating_sd,
         perf_rating_rep_lb = perf_rating_lb,
         perf_rating_rep_ub = perf_rating_ub)

x_pid_dif <- x_pid %>%
  filter(type == "dif") %>%
  select(!c("par", "type")) %>%
  rename(perf_rating_dif = perf_rating,
         perf_rating_dif_sd = perf_rating_sd,
         perf_rating_dif_lb = perf_rating_lb,
         perf_rating_dif_ub = perf_rating_ub,
         perf_rating_dif_lb_90 = perf_rating_lb_90,
         perf_rating_dif_ub_90 = perf_rating_ub_90)


x_pid_fxd <- x_pid %>%
  filter(type == "fixed") %>%
  select(!c("par", "type", "perf_rating_lb_90", "perf_rating_ub_90")) %>%
  rename(perf_rating_fxd = perf_rating,
         perf_rating_fxd_sd = perf_rating_sd,
         perf_rating_fxd_lb = perf_rating_lb,
         perf_rating_fxd_ub = perf_rating_ub)

x_sep_dept <- full_join(x_pid_dem, x_pid_rep) %>%
  full_join(x_pid_dif) %>%
  bind_rows(x_pid_fxd) %>%
  full_join(agency_data_pid, by = c("agency_index", "fixed")) %>%
  select(!c("agency_index", "fixed")) %>%
  select(agency_rated, everything()) %>%
  rename(agency = agency_rated)

rm(x, x_std, x_pid, x_pid_dem, x_pid_rep, x_pid_dif, x_pid_fxd) # clean up

#### Format ratings with party hierarchy ####

# Get draws of latent traits
x <- as.matrix(perf_pid_sep_pty_grp, pars = c("x_dem", "x_rep", "x_fixed"))

x_std <- (x - mean(x)) / sd(x)

mean(x_std)

sd(x_std)

x_std <- as_tibble(x_std)

for (i in 1:for_stan_pid_sep_pty_grp$A_pid) {
  
  pid_vars <- paste0(c("x_dem[", "x_rep["), i, "]")
  
  new_var <- paste0("x_dif[", i, "]")
  
  x_std <- x_std %>%
    mutate({{ new_var }} := .data[[pid_vars[1]]] - .data[[pid_vars[2]]])
  
}

x_pid <- tibble(perf_rating = apply(x_std, MARGIN = 2, FUN = mean),
                perf_rating_sd = apply(x_std, MARGIN = 2, FUN = sd),
                perf_rating_lb = apply(x_std, MARGIN = 2, FUN = quantile, probs = 0.025),
                perf_rating_ub = apply(x_std, MARGIN = 2, FUN = quantile, probs = 0.975),
                perf_rating_lb_90 = apply(x_std, MARGIN = 2, FUN = quantile, probs = 0.05),
                perf_rating_ub_90 = apply(x_std, MARGIN = 2, FUN = quantile, probs = 0.95),
                par = names(x_std))

# Create fixed indicator and agency index

x_pid <- x_pid %>%
  mutate(fixed = as.numeric(stringr::str_detect(par, "fixed")),
         agency_index = stringr::str_sub(par,
                                         start = stringr::str_locate(par, "\\[")[,1] + 1,
                                         end =   stringr::str_locate(par, "\\]")[,1] - 1),
         agency_index = as.numeric(agency_index)) %>%
  mutate(type = stringr::str_sub(par,
                                 start = stringr::str_locate(par, "\\_")[,1] + 1,
                                 end =   stringr::str_locate(par, "\\[")[,1] - 1))

x_pid_dem <- x_pid %>%
  filter(type == "dem") %>%
  select(!c("par", "type", "perf_rating_lb_90", "perf_rating_ub_90")) %>%
  rename(perf_rating_dem_pty = perf_rating,
         perf_rating_dem_pty_sd = perf_rating_sd,
         perf_rating_dem_pty_lb = perf_rating_lb,
         perf_rating_dem_pty_ub = perf_rating_ub)

x_pid_rep <- x_pid %>%
  filter(type == "rep") %>%
  select(!c("par", "type", "perf_rating_lb_90", "perf_rating_ub_90")) %>%
  rename(perf_rating_rep_pty = perf_rating,
         perf_rating_rep_pty_sd = perf_rating_sd,
         perf_rating_rep_pty_lb = perf_rating_lb,
         perf_rating_rep_pty_ub = perf_rating_ub)

x_pid_dif <- x_pid %>%
  filter(type == "dif") %>%
  select(!c("par", "type")) %>%
  rename(perf_rating_dif_pty = perf_rating,
         perf_rating_dif_pty_sd = perf_rating_sd,
         perf_rating_dif_pty_lb = perf_rating_lb,
         perf_rating_dif_pty_ub = perf_rating_ub,
         perf_rating_dif_pty_lb_90 = perf_rating_lb_90,
         perf_rating_dif_pty_ub_90 = perf_rating_ub_90)


x_pid_fxd <- x_pid %>%
  filter(type == "fixed") %>%
  select(!c("par", "type", "perf_rating_lb_90", "perf_rating_ub_90")) %>%
  rename(perf_rating_fxd_pty = perf_rating,
         perf_rating_fxd_pty_sd = perf_rating_sd,
         perf_rating_fxd_pty_lb = perf_rating_lb,
         perf_rating_fxd_pty_ub = perf_rating_ub)

x_sep_pty <- full_join(x_pid_dem, x_pid_rep) %>%
  full_join(x_pid_dif) %>%
  bind_rows(x_pid_fxd) %>%
  full_join(agency_data_pid, by = c("agency_index", "fixed")) %>%
  select(!c("agency_index", "fixed")) %>%
  select(agency_rated, everything()) %>%
  rename(agency = agency_rated)

rm(x, x_std, x_pid, x_pid_dem, x_pid_rep, x_pid_dif, x_pid_fxd) # clean up

# Join separate partisan ratings

x_sep <- full_join(x_sep_dept, x_sep_pty)

nrow(x_sep_dept) == nrow(x_sep_pty)
nrow(x_sep_dept) == nrow(x_sep)

rm(x_sep_dept, x_sep_pty)

# Create indicator for sig. differences

x_sep <- x_sep %>%
  mutate(sig_dif        = if_else(perf_rating_dif_lb        > 0 | perf_rating_dif_ub        < 0, 1, 0),
         sig_dif_pty    = if_else(perf_rating_dif_pty_lb    > 0 | perf_rating_dif_pty_ub    < 0, 1, 0),
         sig_dif_90     = if_else(perf_rating_dif_lb_90     > 0 | perf_rating_dif_ub_90     < 0, 1, 0),
         sig_dif_pty_90 = if_else(perf_rating_dif_pty_lb_90 > 0 | perf_rating_dif_pty_ub_90 < 0, 1, 0))

table(dif = x_sep$sig_dif, dif_pty = x_sep$sig_dif_pty)

table(dif = x_sep$sig_dif_90, dif_pty = x_sep$sig_dif_pty_90)

# Subset to agencies with at least 5 ratings from each party

x_sep <- x_sep %>% filter(n_dem_ratings_agency >= 5 & n_rep_ratings_agency >= 5)

# Look at number of sig. difs

table(dif = x_sep$sig_dif, dif_pty = x_sep$sig_dif_pty)

table(dif = x_sep$sig_dif_90, dif_pty = x_sep$sig_dif_pty_90)

#### Final formatting ####

rm(list = ls()[!str_detect(ls(), "x_sep|perf_ratings_pid_out")])

# Joint sets of ratings
perf_ratings_pid <- full_join(perf_ratings_pid_out, x_sep)

# Format order of variables 
perf_ratings_pid <- perf_ratings_pid %>%
  select(agency, starts_with("perf_"), everything())

#### Save the formatted partisan estimates ####

save(perf_ratings_pid, file = "data/ratings/performance_ratings/05_performance_ratings_pid_fmtd.RData")














